0: Preparation
Defining the input and output files
Loading libraries
library(knitr)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.2
Causative variant
Variant fixation
Fixation probability
Fixation time
# Function to determine the number of rows until fixation is reached
time_to_fixation <- function(data, threshold = 0.9) {
# Find the index of the first row where the allele frequency is 0.9 or higher
fixation_index <- which(data$allele_frequency >= threshold)[1]
# Return the number of rows until fixation is reached
return(fixation_index - 1)
}
Summary - Variant fixation
| 1 |
s=0.05 |
0.2 |
33.80 |
27 |
39 |
| 3 |
s=0.2 |
16.3 |
32.40 |
22 |
39 |
| 2 |
s=0.1 |
1.5 |
31.85 |
18 |
39 |
| 4 |
s=0.3 |
27.8 |
27.55 |
18 |
39 |
| 5 |
s=0.4 |
40.8 |
20.95 |
15 |
29 |
| 6 |
s=0.5 |
40.0 |
16.65 |
10 |
26 |
| 7 |
s=0.6 |
35.7 |
11.45 |
8 |
16 |
| 8 |
s=0.7 |
46.5 |
10.25 |
7 |
13 |
| 9 |
s=0.8 |
45.5 |
7.45 |
4 |
10 |
Causative variant windows
Variant position
Window lengths
Summary - Causative variant windows
| 1 |
s=0.05 |
3.655 |
71.0 |
32 |
100 |
71.9 |
| 3 |
s=0.2 |
4.430 |
78.9 |
34 |
100 |
79.3 |
| 4 |
s=0.3 |
4.435 |
83.1 |
38 |
100 |
84.0 |
| 5 |
s=0.4 |
4.625 |
78.1 |
32 |
100 |
78.2 |
| 2 |
s=0.1 |
4.775 |
73.2 |
12 |
100 |
73.9 |
| 6 |
s=0.5 |
5.040 |
92.9 |
56 |
100 |
93.6 |
| 7 |
s=0.6 |
5.875 |
87.2 |
48 |
100 |
88.0 |
| 8 |
s=0.7 |
6.220 |
93.2 |
36 |
100 |
94.4 |
| 9 |
s=0.8 |
7.895 |
88.9 |
28 |
100 |
89.9 |
Standard Error Confidence interval function
Confidence level <=> konfidensgrad
# Function to calculate standard error and its confidence interval
standard_error_confidence_interval_fun <- function(observed_data, confidence_level = 0.95) {
# Calculate standard error
n <- length(observed_data)
standard_deviation <- sd(observed_data)
standard_error <- standard_deviation / sqrt(n - 1)
# Calculate confidence interval based on standard error
alpha <- (1 - confidence_level) / 2
margin_of_error <- qnorm(1 - alpha) * standard_error
mean_estimate <- mean(observed_data)
# Calculate the percentiles for the confidence interval
confidence_interval_lower_bound <- mean_estimate - margin_of_error # 2.5th percentile (2σ)
confidence_interval_upper_bound <- mean_estimate + margin_of_error # 97.5th percentile (2σ)
# Return confidence interval
return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
}
# # Function to calculate bootstrap confidence intervals
# standard_error_confidence_interval_fun <- function(observed_data, n_bootstraps = 1000, confidence_level = 0.95) {
#
# # Function to calculate the mean for each bootstrap sample
# compute_bootstrap_mean_fun <- function(observed_data_urn) {
# bootstrap_dataset <- sample(observed_data_urn, replace = TRUE)
# bootstrap_estimate <- mean(bootstrap_dataset)
# return(bootstrap_estimate)
# }
#
# # Perform bootstrap resampling
# bootstrap_point_estimates <- replicate(n_bootstraps, compute_bootstrap_mean_fun(observed_data))
#
# # Calculate the percentiles for the confidence interval
# alpha <- (1 - confidence_level) / 2
# confidence_interval_lower_bound <- quantile(bootstrap_point_estimates, alpha) # 2.5th percentile
# confidence_interval_upper_bound <- quantile(bootstrap_point_estimates, 1 - alpha) # 97.5th percentile
#
# # Return the confidence interval
# return(c(confidence_interval_lower_bound, confidence_interval_upper_bound))
# }
1: ROH-Frequency
1.1 : Autosome ROH-frequencies
1.1.1 : Empirical - ROH frequency
1.1.2: Neutral Model - ROH frequency
1.1.3: Selection Model
1.1.4 Summary - ROH-hotspot threshold
## ROH-hotspot selection testing results://n
| Neutral |
40.0 |
| s=0.1 |
42.6 |
| s=0.3 |
45.1 |
| s=0.05 |
46.6 |
| s=0.2 |
47.0 |
| s=0.4 |
47.0 |
| s=0.6 |
52.0 |
| s=0.5 |
54.2 |
| s=0.7 |
58.2 |
| s=0.8 |
60.6 |
| Empirical |
75.0 |
1.2 ROH-hotspots - ROH Frequency and Length
2: Inbreeding coefficient
2.1 Empirical data

## Overall Average Avg_F_ROH for german_shepherd : 0.2753012 //n
2.2 Neutral Model

## Point estimate F_ROH across all 20 simulations: 0.3231565 //n
## [1] "Bootstrap 95% Confidence Interval: [0.302707382298313, 0.343605637701687]"
2.3 Selection Model

## Point estimate F_ROH across all 20 simulations for selection_model_s005_chr3 : 0.3789376 //n[1] "Bootstrap 95% Confidence Interval: [0.356936709259486, 0.400938510740514]"

## Point estimate F_ROH across all 20 simulations for selection_model_s01_chr3 : 0.3475749 //n[1] "Bootstrap 95% Confidence Interval: [0.329352061370301, 0.365797778629699]"

## Point estimate F_ROH across all 20 simulations for selection_model_s02_chr3 : 0.3852102 //n[1] "Bootstrap 95% Confidence Interval: [0.360506697169921, 0.409913642830079]"

## Point estimate F_ROH across all 20 simulations for selection_model_s03_chr3 : 0.382973 //n[1] "Bootstrap 95% Confidence Interval: [0.356057264547997, 0.409888755452003]"

## Point estimate F_ROH across all 20 simulations for selection_model_s04_chr3 : 0.4022861 //n[1] "Bootstrap 95% Confidence Interval: [0.379804687863133, 0.424767512136866]"

## Point estimate F_ROH across all 20 simulations for selection_model_s05_chr3 : 0.438943 //n[1] "Bootstrap 95% Confidence Interval: [0.407964226284795, 0.469921813715205]"

## Point estimate F_ROH across all 20 simulations for selection_model_s06_chr3 : 0.4301045 //n[1] "Bootstrap 95% Confidence Interval: [0.398231547108479, 0.461977472891521]"

## Point estimate F_ROH across all 20 simulations for selection_model_s07_chr3 : 0.4466082 //n[1] "Bootstrap 95% Confidence Interval: [0.419672277441459, 0.473544062558542]"

## Point estimate F_ROH across all 20 simulations for selection_model_s08_chr3 : 0.4687358 //n[1] "Bootstrap 95% Confidence Interval: [0.432117907312471, 0.505353732687529]"
2.4 F_ROH summary
| Empirical |
0.275 |
NA |
NA |
| Neutral |
0.323 |
0.303 |
0.344 |
| s=0.1 |
0.348 |
0.329 |
0.366 |
| s=0.05 |
0.379 |
0.357 |
0.401 |
| s=0.3 |
0.383 |
0.356 |
0.410 |
| s=0.2 |
0.385 |
0.361 |
0.410 |
| s=0.4 |
0.402 |
0.380 |
0.425 |
| s=0.6 |
0.430 |
0.398 |
0.462 |
| s=0.5 |
0.439 |
0.408 |
0.470 |
| s=0.7 |
0.447 |
0.420 |
0.474 |
| s=0.8 |
0.469 |
0.432 |
0.505 |
2.4.1 Visualizing F_ROH

## Models where empirical F_ROH is within CI:
## character(0)
##
## Models where empirical F_ROH is outside CI:
## [1] "s=0.05" "s=0.1" "s=0.2" "s=0.3" "s=0.4" "s=0.5" "s=0.6"
## [8] "s=0.7" "s=0.8" "Neutral"
3: Expected Heterozygosity
3.1 Empirical data
3.2 Neutral Model
3.3 Selection Model
## Uncommented because change of analysis
3.4 Causative Variant Window
## Point estimate H_e across all 20 simulations for s005_chr3 : 0.03835051 //n[1] "Bootstrap 95% Confidence Interval: [0.0286911574700695, 0.0480098532362818]"
## Point estimate H_e across all 20 simulations for s01_chr3 : 0.03615789 //n[1] "Bootstrap 95% Confidence Interval: [0.0226773044355698, 0.0496384684079676]"
## Point estimate H_e across all 20 simulations for s02_chr3 : 0.0249121 //n[1] "Bootstrap 95% Confidence Interval: [0.0147684851908972, 0.0350557107187597]"
## Point estimate H_e across all 20 simulations for s03_chr3 : 0.02055552 //n[1] "Bootstrap 95% Confidence Interval: [0.00764693794749041, 0.0334641051013026]"
## Point estimate H_e across all 20 simulations for s04_chr3 : 0.02878277 //n[1] "Bootstrap 95% Confidence Interval: [0.0129018766653808, 0.044663667853679]"
## Point estimate H_e across all 20 simulations for s05_chr3 : 0.01151372 //n[1] "Bootstrap 95% Confidence Interval: [0.00244507213179341, 0.0205823727315342]"
## Point estimate H_e across all 20 simulations for s06_chr3 : 0.01601255 //n[1] "Bootstrap 95% Confidence Interval: [0.00661909993119089, 0.0254060099286539]"
## Point estimate H_e across all 20 simulations for s07_chr3 : 0.008196857 //n[1] "Bootstrap 95% Confidence Interval: [0.00329575703966468, 0.0130979577437562]"
## Point estimate H_e across all 20 simulations for s08_chr3 : 0.01370687 //n[1] "Bootstrap 95% Confidence Interval: [0.00367441728198638, 0.0237393233295754]"
3.4 Genomewide 5th percentile comparison - Expected Heterozygosity
Summary
| s005_chr3 |
0 |
0 |
0 |
| s01_chr3 |
0 |
0 |
0 |
| s02_chr3 |
0 |
0 |
0 |
| s03_chr3 |
0 |
0 |
0 |
| s04_chr3 |
0 |
0 |
0 |
| s05_chr3 |
0 |
0 |
0 |
| s06_chr3 |
0 |
0 |
0 |
| s07_chr3 |
0 |
0 |
0 |
| s08_chr3 |
0 |
0 |
0 |
| Neutral |
0 |
0 |
0 |
| Empirical |
NA |
NA |
NA |
4: Summary
4.0 General comparison
4.0.1 ROH-hotspot threshold comparison
##
## ROH-hotspot threshold comparison between the different datasets
| Neutral |
40.0 |
| s=0.1 |
42.6 |
| s=0.3 |
45.1 |
| s=0.05 |
46.6 |
| s=0.2 |
47.0 |
| s=0.4 |
47.0 |
| s=0.6 |
52.0 |
| s=0.5 |
54.2 |
| s=0.7 |
58.2 |
| s=0.8 |
60.6 |
| Empirical |
75.0 |
4.0.2 F_ROH comparison
| Empirical |
0.275 |
NA |
NA |
| Neutral |
0.323 |
0.303 |
0.344 |
| s=0.1 |
0.348 |
0.329 |
0.366 |
| s=0.05 |
0.379 |
0.357 |
0.401 |
| s=0.3 |
0.383 |
0.356 |
0.410 |
| s=0.2 |
0.385 |
0.361 |
0.410 |
| s=0.4 |
0.402 |
0.380 |
0.425 |
| s=0.6 |
0.430 |
0.398 |
0.462 |
| s=0.5 |
0.439 |
0.408 |
0.470 |
| s=0.7 |
0.447 |
0.420 |
0.474 |
| s=0.8 |
0.469 |
0.432 |
0.505 |
4.1: Hotspot comparison
4.1.1: Selection test (Sweep test)
## [1] "Selection test results"
## [1] "ROH-hotspot windows with an mean H_e Value lower or equal to the lower confidence interval of the fifth percentile of the neutral model are classified as being under selection"
## [1] "5th percentile of the neutral model is: 0"
| Hotspot_chr3_window_1 |
No |
0.10706 |
| Hotspot_chr3_window_3 |
No |
0.12371 |
| Hotspot_chr3_window_2 |
No |
0.14453 |
| Hotspot_chr17_window_2 |
No |
0.14866 |
| Hotspot_chr17_window_1 |
No |
0.15285 |
| Hotspot_chr19_window_1 |
No |
0.16270 |
## [1] "ROH-hotspots under selection:"
4.1.2: Selection Strength Test (Sweep test)
Hotspot - Causative Window - Comparison
| Hotspot_chr3_window_1 |
10.900 |
81.3 |
0.10706 |
No |
| s=0.8 |
7.895 |
88.9 |
0.01371 |
No |
| s=0.7 |
6.220 |
93.2 |
0.00820 |
No |
| s=0.6 |
5.875 |
87.2 |
0.01601 |
No |
| s=0.5 |
5.040 |
92.9 |
0.01151 |
No |
| s=0.1 |
4.775 |
73.2 |
0.03616 |
No |
| s=0.4 |
4.625 |
78.1 |
0.02878 |
No |
| s=0.3 |
4.435 |
83.1 |
0.02056 |
No |
| s=0.2 |
4.430 |
78.9 |
0.02491 |
No |
| Hotspot_chr19_window_1 |
4.400 |
75.6 |
0.16270 |
No |
| s=0.05 |
3.655 |
71.0 |
0.03835 |
No |
| Hotspot_chr3_window_2 |
3.200 |
76.3 |
0.12371 |
No |
| Hotspot_chr3_window_3 |
2.700 |
77.6 |
0.14453 |
No |
| Hotspot_chr17_window_1 |
2.000 |
76.4 |
0.14866 |
No |
| Hotspot_chr17_window_2 |
0.600 |
76.1 |
0.15285 |
No |
Visualizing and scaling
# Min-max-scaling normalization function, that normalizes a vector of values
min_max_normalization_fun <- function(x) {
return((x - min(x)) / (max(x) - min(x)))
}
# Install and load the scatterplot3d package
# install.packages("scatterplot3d")
library(scatterplot3d)
## Warning: package 'scatterplot3d' was built under R version 4.3.1
### Min max scaling ###
# Normalize Lengths_Mb
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb)
# Normalize ROH Frequency
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq)
# Normalize H_e
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized <- min_max_normalization_fun(Hotspots_and_Causative_windows_comparison_sorted$H_e)
# Modify the labels by removing "Hotspot_" prefix and "_window" suffix from hotspot models
Hotspots_and_Causative_windows_comparison_sorted$Label <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
gsub("Hotspot_|_window", "", Hotspots_and_Causative_windows_comparison_sorted$Model),
Hotspots_and_Causative_windows_comparison_sorted$Model
)
# Create a new column for identifying the type
Hotspots_and_Causative_windows_comparison_sorted$Type <- ifelse(
grepl("Hotspot", Hotspots_and_Causative_windows_comparison_sorted$Model),
"Hotspot",
"Selection Coefficient"
)
plot_title <- paste("Length And Average ROH % Comparison - ROH Hotspots & Causative Windows")
# Create the 3D scatter plot
s3d <- scatterplot3d(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized,
color = ifelse(Hotspots_and_Causative_windows_comparison_sorted$Type == "Hotspot", "red", "blue"),
pch = 19, # Solid circle
xlab = "Normalized Lengths",
ylab = "Normalized Mean ROH Frequency",
zlab = "Normalized H_e",
main = plot_title
)
# Add labels to the points
s3d.coords <- s3d$xyz.convert(
Hotspots_and_Causative_windows_comparison_sorted$Lengths_Mb_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$ROH_Freq_Normalized,
Hotspots_and_Causative_windows_comparison_sorted$H_e_Normalized
)
text(s3d.coords$x, s3d.coords$y,
labels = Hotspots_and_Causative_windows_comparison_sorted$Label,
pos = 3, cex = 0.5) # pos=3 means above


5 Visualizing Expected Heterozygoisty
5.1 Bootstrap CI for mean 5th percentile H_e

## Models where empirical H_e is within CI for Hotspot_chr3_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_1 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr3_window_3 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_3 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr3_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr3_window_2 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr17_window_2 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_2 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr17_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr17_window_1 :
## character(0)

## Models where empirical H_e is within CI for Hotspot_chr19_window_1 :
## character(0)
##
## Models where empirical H_e is outside CI for Hotspot_chr19_window_1 :
## character(0)
5.2 5th percentile of the extreme H_e values